#! /usr/bin/env python
# -*- coding: utf-8 -*-

# *************************************************************
#     Filename @  crawl.py
#       Author @  Huoty
#  Create date @  2015-08-01 16:07:56
#  Description @  
# *************************************************************

from sys import argv
from os import makedirs, unlink, sep
from os.path import dirname, exists, isdir, splitext
from string import replace, find, lower
from htmllib import HTMLParser
from urllib import urlretrieve
from urlparse import urlparse, urljoin
from formatter import DumbWriter, AbstractFormatter
from cStringIO import StringIO

# Script starts from here

class Retriever(object):
    '''
    download Web pages
    '''
    def __init__(self, url):
        self.url = url
        self.file = self.filename(url)

    def filename(self, url, deffile='index.htm'):
        pareedurl = urlparse(url, 'http:', 0) # parse path
        path = parsedurl[1] + parsedurl[2]
        ext = splitext(path)
        if ext[1] == '': # no file, use default
            if path[-1] == '/':
                path += deffile
            else:
                path += '/' + deffile
